home *** CD-ROM | disk | FTP | other *** search
- /* file file_utils.c ... by ^z -- 870820-0913-...
- * some utility routines for qndxr project, associated with files...
- */
-
- #include <stdio.h>
- #include <unix.h>
- #include <storage.h>
- #include <strings.h>
- #include <ctype.h>
- #include <proto.h>
- #include "qndxr.2.h"
-
- /* function to write out sorted k & p files based on the doc and ptr
- * arrays in memory....
- *
- * The kfile format is as described in detail elsewhere:
- * the key word, turned into all capital letters and with spaces
- * afterward, of fixed length KEY_LENGTH; and
- * the cumulative count of how many words have passed before, including
- * the current word, a long integer.
- *
- * Function revised 870907-... by ^z to use zbuffer method....
- */
-
- void write_sorted_files (doc, ptr, nwords, pass_number, offset)
- char *doc, **ptr;
- long nwords, offset;
- int pass_number;
- {
- extern long zbufsiz;
- FILE *kfile, *pfile, *open_kfile(), *open_pfile();
- char *prev_word, *next_output_item();
- KEY_REC *outk;
- long *outp, i, file_size ();
- void create_zbuffer(), write_new_key();
-
- DEBUG ("--Entering write_sorted_files with nwords %ld\n", nwords);
- if (nwords == 0)
- return;
-
- DEBUG ("--Opening kfile & pfile for pass_number = %d\n", pass_number);
- kfile = open_kfile (pass_number);
- pfile = open_pfile (pass_number);
-
- DEBUG ("--Creating buffers for keys & ptrs, size = %ld\n", zbufsiz);
- create_zbuffer (0, zbufsiz, kfile, sizeof(KEY_REC));
- create_zbuffer (1, zbufsiz, pfile, sizeof(long));
-
- DEBUG ("--Beginning to write keys and ptrs; first key=%.28s\n", ptr[0]);
- prev_word = ptr[0];
- outk = (KEY_REC *)next_output_item (0);
- write_new_key (ptr[0], outk->kkey);
-
- for (i = 0; i < nwords; ++i)
- {
- if (is_new_word (prev_word, ptr[i]))
- {
- outk->ccount = i;
- outk = (KEY_REC *)next_output_item (0);
- write_new_key (ptr[i], outk->kkey);
- prev_word = ptr[i];
- }
- outp = (long *)next_output_item (1);
- *outp = (ptr[i] - doc) + offset;
- }
- outk->ccount = i;
-
- flush_zbuffer (0);
- flush_zbuffer (1);
-
- DEBUG ("--Getting rid of key and ptr buffers...\n", NULL);
- free_zbuffer (0);
- free_zbuffer (1);
-
- printf (" ...%ld distinct words\n",
- file_size (kfile) / sizeof(KEY_REC));
- fclose (kfile);
- fclose (pfile);
- }
-
-
- /* function to determine if the current word is the same as or different
- * from the previous word -- if it is different, we'll need to write an
- * entry out to the key file kfile -- compare the words up to the first
- * '\0', or for a maximum distance of KEY_LENGTH, and return TRUE
- * if they differ, FALSE if they are identical that far. Thus, a simple
- * call to zstrcmp() does the job.... but keep ours as a function instead
- * of a macro call for the moment, for safety and readability....
- */
-
- int is_new_word (w0, w1)
- char *w0, *w1;
- {
- return (zstrcmp (w0, w1));
- }
-
-
- /* function to write out a new key entry in the key_file:
- * KEY_LENGTH letters consisting of the key word (which will be found
- * delimited by a '\0'), followed by enough blanks to fill out the
- * record to total length KEY_LENGTH ...
- */
-
- void write_new_key (p, kp)
- register char *p, *kp;
- {
- register int i, c;
-
- for (i = 0; i < KEY_LENGTH; ++i)
- {
- c = *p++;
- if (c == '\0')
- break;
- *kp++ = c;
- }
-
- for ( ; i < KEY_LENGTH; ++i)
- *kp++ = ' ';
- }
-
-
-